library(pacman)

p_load(tidyverse, data.table, edgeR, ggplot2, hrbrthemes, viridis, scales, rmarkdown)

doc_theme <- theme_ipsum(
    base_family = "Arial", 
    caption_margin = 12,
    axis_title_size = 12,
    axis_col = "black")

curated_names <- fread(
    "../../curated_names.tsv")

aba.bed <- fread(
    "../../CP046654.1.bed",
    col.names = c(
        "chromosome",
        "left",
        "right",
        "locus_tag",
        "gene_name",
        "strand",
        "coding",
        "completeness"))

aba.counts <- fread(
    "../../all_counts_seal.tsv.gz",
    header = FALSE,
    col.names = c(
        "spacer",
        "count",
        "condition"))

aba.key <- fread(
    "../../aba_key.tsv")

aba.design <- fread(
    "../../ABA1_experimental_design.tsv",
    na.strings = c("NA"))

aba.genome <- aba.key %>% 
    group_by(locus_tag) %>% 
    select(
        locus_tag,
        spacer, 
        type, 
        y_pred, 
        target, 
        offset) %>% 
    inner_join(
        curated_names, 
        by = c("locus_tag" = "AB19606")) %>% 
    rename(AB19606 = locus_tag)

# define the experimental design space to only take into consideration "tubes"
aba.design <- aba.design %>%
    filter(experiment == "tube")

# keep only the counts that are in the experimental design space
aba.counts <- aba.counts %>% semi_join(aba.design)
## Joining, by = "condition"

Helper function to convert from dataframe to data.matrix

data.matrix.withnames <- function(x) {
    m <- data.matrix(x[,-1], rownames.force = TRUE)
    rownames(m) <- as.data.frame(x)[,1]
    return(m)
}

Build up components of bottleneck calculation

# https://www.nature.com/articles/nmeth.3253
aba.design <- aba.design %>% 
    mutate(generations = case_when(
        timing == "T0" ~ 0,
        timing == "T1" ~ 9, 
        timing == "T2" ~ 18))

aba.counts.verbose <- aba.counts %>%
    inner_join(aba.design) %>% 
    inner_join(aba.key) %>%
    unite("Sample", drug, dose, timing, sep = "_") %>% 
    unite("Sample",    Sample, rep, sep = "_", remove = FALSE) %>% 
    select(type, spacer, count, Sample, Sample, generations) %>%
    group_by(type) %>%
    nest %>% 
    mutate(
        data = map(
            data, ~pivot_wider(
                .x, 
                id_cols = spacer, 
                names_from = Sample, 
                values_from = count, 
                values_fill = 0)),
        data_matrix = map(
            data, ~data.matrix.withnames(.x)))

aba.group <- aba.design %>%
    as_tibble %>%
    unite("Sample", drug, dose, timing, sep = "_") %>% 
    pull(Sample) %>% 
    factor

aba.permut <- model.matrix( ~ 0 + aba.group)

colnames(aba.permut) <- levels(aba.group)
aba.edgeR <- aba.counts.verbose %>% 
    mutate(
        y = map(
            data_matrix, 
            ~DGEList(counts = ., group = aba.group, genes = row.names(.))),
        keep = map(
            y,
            ~filterByExpr(y = ., design = aba.permut, group = aba.group)),
        y = map2(
            y, 
            keep, 
            ~.x[.y, , keep.lib.sizes = FALSE]),
        y = map(
            y,
            ~calcNormFactors(.)),
        y = map(
            y,
            ~estimateDisp(., aba.permut)),
        fit = map(
            y, 
            ~glmQLFit(., aba.permut, robust = TRUE)))

Under this paradigm, the fitted values should be equal due to model fitting between replicates.

Thus, we can throw away one replicate and just keep rep. 1 to exhibit the fitted CPM.

aba.cpm <- aba.edgeR %>%
    mutate(
        cpm.fit = map2(
            fit, 
            y, 
            #~cpm(y = .x$fitted.values)),
            ~cpm(y = .x$fitted.values, lib.size = exp(getOffset(.y)))),
        cpm.fit = map(
            cpm.fit, 
            ~as_tibble(., rownames = "spacer")),
        cpm.fit = map(
            cpm.fit,
            ~pivot_longer(.x, !spacer, names_to = "Sample", values_to = "Counts per Million (Fitted)")),
        cpm.fit = map(
            cpm.fit,
            ~filter(.x, Sample %in% c(
                "None_0_T0_1",
                "None_0_T1_1",
                "None_0_T2_1"))),
        cpm.fit = map(cpm.fit, ~.x %>% mutate(Sample = gsub("_[0-9]$", "", Sample))),
        cpm.fit = map(cpm.fit, ~.x %>% select(Sample, spacer, `Counts per Million (Fitted)`)),
        cpm.fit = map(cpm.fit, ~.x %>% mutate(Sample = case_when(
            Sample == "None_0_T0" ~ "Uninduced",
            Sample == "None_0_T1" ~ "Induced for 18 hours",
            Sample == "None_0_T2" ~ "Induced for 36 hours"))),
        cpm.fit = map(cpm.fit, ~.x %>% mutate(Sample = factor(
            Sample,
            levels = c(
                "Uninduced",
                "Induced for 18 hours",
                "Induced for 36 hours")))),
        Type = type,
        # Type = case_when(
        #   type == "control" ~ "Non-targeting Guides",
        #   type == "mismatch" ~ "Mismatched Guides",
        #   type == "perfect" ~ "Perfect Guides"),
        Type = factor(
            Type,
            levels = c(
                "control",
                "mismatch",
                "perfect")))

Do stats to normalize by z-score and change to distribution of controls. https://stats.stackexchange.com/questions/46429/transform-data-to-desired-mean-and-standard-deviation

# aba.cpm <- aba.cpm %>%
#   mutate(
#       summary = map(
#         cpm.fit, 
#         ~.x %>% group_by(Sample) %>% 
#             summarise(
#                 mean = mean(`Counts per Million (Fitted)`) %>% round(3),
#                 med = median(`Counts per Million (Fitted)`) %>% round(3),
#                 min = min(`Counts per Million (Fitted)`) %>% round(3),
#                 max = max(`Counts per Million (Fitted)`) %>% round(3),
#                 sd = sd(`Counts per Million (Fitted)`) %>% round(3))),
#       cpm.fit = map2(
#           cpm.fit,
#           summary,
#           ~inner_join(.x, .y) %>% mutate(z = (`Counts per Million (Fitted)` - mean) / sd )))
# 
# aba.cpm <- aba.cpm %>%
#   mutate(
#       cpm.fit = map(
#           cpm.fit, 
#           ~inner_join(
#               .x, 
#               aba.cpm %>% ungroup %>% filter(type == "control") %>% select(summary) %>% unnest(c(summary)), 
#               by = "Sample")))
# 
# aba.cpm <- aba.cpm %>% mutate(cpm.fit = map(cpm.fit, ~.x %>% mutate(`Normalized Counts per Million (Fitted)` = z * sd.y + mean.y)))
aba.cpm %>% 
    unnest(cpm.fit) %>% 
    select(Type, Sample, `Counts per Million (Fitted)`) %>% 
    ggplot(aes(x = `Counts per Million (Fitted)`, fill = `Sample`)) + 
    geom_density(lwd = 1, alpha = 0.5) + 
    scale_fill_manual(values = RColorBrewer::brewer.pal(6,"Paired")[c(TRUE, FALSE)]) +
#   scale_fill_viridis(
#     discrete = TRUE, 
#     alpha = 0.7, 
#     direction = 1, 
#     option = "inferno") + 
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale())) +
    facet_wrap(~ Type) + 
    doc_theme
## Adding missing grouping variables: `type`

ggsave(
    "Induction_Density.svg", 
    width = 12, 
    height = 3, 
    dpi = 600, 
    units = "in", 
    device = "svg")
cpm.max <- aba.cpm %>% 
    ungroup %>% 
    select(cpm.fit) %>% 
    unnest(cols = "cpm.fit") %>% 
    select(`Counts per Million (Fitted)`) %>% max

aba.cpm.t <- aba.cpm %>%
    mutate(
        plot = map2(
            cpm.fit,
            Type,
            ~ggplot(
                .x,
                aes(x = `Counts per Million (Fitted)`, fill = `Sample`)) +
                geom_density() +
                ggtitle(.y) +
                doc_theme + 
                scale_fill_viridis(
        discrete = TRUE, 
        alpha = 0.75, 
        direction = 1, 
        option = "inferno") +
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max))))

print(aba.cpm.t$plot)
## [[1]]

## 
## [[2]]

## 
## [[3]]

cpm.max <- aba.cpm %>% 
    ungroup %>% 
    select(cpm.fit) %>% 
    unnest(cols = "cpm.fit") %>% 
    select(`Counts per Million (Fitted)`) %>% max

aba.cpm.t <- aba.cpm %>%
    unnest(cols = c(cpm.fit)) %>%
    group_by(Sample) %>% 
    select(`Type`, `Sample`, spacer, `Counts per Million (Fitted)`) %>% 
    group_by(Sample) %>% 
    nest %>%
    mutate(
        plot = map2(
            data,
            `Sample`,
            ~ggplot(
                .x,
                aes(x = `Counts per Million (Fitted)`, fill = `Type`)) +
                geom_density() +
                ggtitle(.y) +
                doc_theme +
    scale_fill_viridis(
        discrete = TRUE, 
        alpha = 0.3, 
        direction = -1, 
        option = "magma") +
    scale_x_continuous(
        trans = scales::pseudo_log_trans(base = 10),
        breaks = c(0, 10^(1:6)),
        labels = label_number(scale_cut = cut_short_scale()),
        limits = c(0, cpm.max))))

print(aba.cpm.t$plot)
## [[1]]

## 
## [[2]]

## 
## [[3]]

Samples that we’re interested in measuring the differences between

conditions.full <- c(
    "None_0_T1 - None_0_T0",
    "None_0_T2 - None_0_T0",
    "Colistin_0.44_T1 - None_0_T0",
    "Colistin_0.44_T2 - None_0_T0",
    "Rifampicin_0.34_T1 - None_0_T0",
    "Rifampicin_0.34_T2 - None_0_T0",
    "Imipenem_0.06_T1 - None_0_T0",
    "Imipenem_0.06_T2 - None_0_T0",
    "Imipenem_0.09_T1 - None_0_T0",
    "Meropenem_0.11_T1 - None_0_T0",
    "Meropenem_0.11_T2 - None_0_T0",
    "Meropenem_0.17_T1 - None_0_T0",
    "Meropenem_0.17_T2 - None_0_T0")

conditions.induction <- c(
    "Colistin_0.44_T1 - None_0_T1",
    "Colistin_0.44_T2 - None_0_T2",
    "Rifampicin_0.34_T1 - None_0_T1",
    "Rifampicin_0.34_T2 - None_0_T2",
    "Imipenem_0.06_T1 - None_0_T1",
    "Imipenem_0.06_T2 - None_0_T2",
    "Imipenem_0.09_T1 - None_0_T1",
    "Imipenem_0.09_T2 - None_0_T2",
    "Meropenem_0.11_T1 - None_0_T1",
    "Meropenem_0.11_T2 - None_0_T2",
    "Meropenem_0.17_T1 - None_0_T1",
    "Meropenem_0.17_T2 - None_0_T2")

conditions.all <- c(conditions.full, conditions.induction)

aba.contrasts <-  makeContrasts(
    contrasts = conditions.all,
    levels = aba.permut)

Perform contrasts and find LFC and FDR

aba.edgeR <- aba.edgeR %>%
    mutate(
        results = map(
            fit,
            ~glmQLFTest(., contrast = aba.contrasts)),
        results = map(
            results,
            ~topTags(., n = Inf)),
        results = map(
            results, ~as_tibble(.) %>% pull(table)),
        results = map(
            results, ~rename(., spacer = genes)),
        results = map(
            results, 
            ~pivot_longer(
                ., 
                !c(spacer, F, PValue, FDR, logCPM), 
                names_to = "contrast", 
                values_to = "logFC")),
        results = map(
            results,
            ~inner_join(
                .,
                aba.key %>% select(y_pred, original, locus_tag, offset, spacer))),
        results = map(
            results, 
            ~.x %>% mutate(contrast = gsub("\\.\\.\\.", " - ", contrast))),
        results = map(
            results, 
            ~.x %>% mutate(contrast = gsub("logFC\\.", "", contrast))),
        summary = map(
            results, 
            ~.x %>% group_by(contrast) %>% 
                summarise(
                    mean = mean(logFC) %>% round(3),
                    med = median(logFC) %>% round(3),
                    min = min(logFC) %>% round(3),
                    max = max(logFC) %>% round(3),
                    sd = sd(logFC) %>% round(3) )),
        summary.paged = map(
            summary, 
            ~paged_table(.)))
## Joining, by = "spacer"
## Joining, by = "spacer"
## Joining, by = "spacer"
print(aba.edgeR$type)
## [1] "mismatch" "perfect"  "control"
print(aba.edgeR$summary.paged)
## [[1]]
##                          contrast   mean    med     min   max    sd
## 1    Colistin_0.44_T1 - None_0_T0 -0.610  0.002 -11.153 2.306 1.423
## 2    Colistin_0.44_T1 - None_0_T1 -0.380 -0.012  -8.492 2.313 0.950
## 3    Colistin_0.44_T2 - None_0_T0 -1.981  0.073 -16.024 4.483 4.045
## 4    Colistin_0.44_T2 - None_0_T2 -0.767  0.000 -13.714 5.034 2.121
## 5    Imipenem_0.06_T1 - None_0_T0 -0.281  0.007  -6.642 1.207 0.797
## 6    Imipenem_0.06_T1 - None_0_T1 -0.050  0.018  -4.735 1.423 0.502
## 7    Imipenem_0.06_T2 - None_0_T0 -1.431  0.057 -15.434 1.821 2.903
## 8    Imipenem_0.06_T2 - None_0_T2 -0.217  0.020 -10.257 3.698 1.170
## 9    Imipenem_0.09_T1 - None_0_T0 -0.282  0.013  -7.589 1.638 0.891
## 10   Imipenem_0.09_T1 - None_0_T1 -0.052  0.020  -5.682 1.872 0.659
## 11   Imipenem_0.09_T2 - None_0_T2 -0.288  0.017 -12.574 4.461 1.478
## 12  Meropenem_0.11_T1 - None_0_T0 -0.242 -0.031  -5.861 1.241 0.640
## 13  Meropenem_0.11_T1 - None_0_T1 -0.012 -0.002  -4.034 1.718 0.324
## 14  Meropenem_0.11_T2 - None_0_T0 -1.506  0.016 -15.434 2.230 3.104
## 15  Meropenem_0.11_T2 - None_0_T2 -0.292  0.001  -9.671 4.238 1.340
## 16  Meropenem_0.17_T1 - None_0_T0 -0.221 -0.020  -6.323 1.701 0.674
## 17  Meropenem_0.17_T1 - None_0_T1  0.009  0.005  -4.360 2.305 0.431
## 18  Meropenem_0.17_T2 - None_0_T0 -1.607 -0.004 -15.654 2.559 3.285
## 19  Meropenem_0.17_T2 - None_0_T2 -0.393 -0.003 -10.836 5.312 1.619
## 20          None_0_T1 - None_0_T0 -0.230 -0.021  -4.960 0.715 0.555
## 21          None_0_T2 - None_0_T0 -1.214  0.040 -14.465 1.169 2.387
## 22 Rifampicin_0.34_T1 - None_0_T0 -0.173 -0.004  -4.519 0.407 0.489
## 23 Rifampicin_0.34_T1 - None_0_T1  0.057  0.025  -2.478 1.618 0.276
## 24 Rifampicin_0.34_T2 - None_0_T0 -1.500 -0.089 -14.416 4.346 3.075
## 25 Rifampicin_0.34_T2 - None_0_T2 -0.286 -0.048  -9.246 5.597 1.599
## 
## [[2]]
##                          contrast   mean    med     min   max    sd
## 1    Colistin_0.44_T1 - None_0_T0 -0.846 -0.038  -9.907 1.514 2.016
## 2    Colistin_0.44_T1 - None_0_T1 -0.591 -0.105  -6.993 1.593 1.324
## 3    Colistin_0.44_T2 - None_0_T0 -3.172 -1.846 -15.106 5.213 5.039
## 4    Colistin_0.44_T2 - None_0_T2 -1.716 -0.857 -13.531 4.594 2.734
## 5    Imipenem_0.06_T1 - None_0_T0 -0.367  0.062  -6.966 1.594 1.146
## 6    Imipenem_0.06_T1 - None_0_T1 -0.113  0.021  -5.103 1.470 0.729
## 7    Imipenem_0.06_T2 - None_0_T0 -1.856 -0.939 -14.701 3.312 3.715
## 8    Imipenem_0.06_T2 - None_0_T2 -0.400  0.027  -9.852 5.400 1.612
## 9    Imipenem_0.09_T1 - None_0_T0 -0.386  0.049  -7.180 2.036 1.290
## 10   Imipenem_0.09_T1 - None_0_T1 -0.132  0.008  -5.316 1.956 0.958
## 11   Imipenem_0.09_T2 - None_0_T2 -0.542 -0.003 -11.461 3.937 1.987
## 12  Meropenem_0.11_T1 - None_0_T0 -0.271  0.009  -7.674 1.595 0.934
## 13  Meropenem_0.11_T1 - None_0_T1 -0.016 -0.002  -4.006 1.644 0.464
## 14  Meropenem_0.11_T2 - None_0_T0 -2.069 -0.944 -14.701 4.058 3.976
## 15  Meropenem_0.11_T2 - None_0_T2 -0.613 -0.107  -9.852 4.533 1.796
## 16  Meropenem_0.17_T1 - None_0_T0 -0.285  0.022  -6.566 2.098 0.991
## 17  Meropenem_0.17_T1 - None_0_T1 -0.030 -0.025  -4.461 2.172 0.639
## 18  Meropenem_0.17_T2 - None_0_T0 -2.229 -1.188 -14.701 4.982 4.162
## 19  Meropenem_0.17_T2 - None_0_T2 -0.773 -0.245 -10.259 5.915 2.114
## 20          None_0_T1 - None_0_T0 -0.254  0.064  -7.924 1.002 0.830
## 21          None_0_T2 - None_0_T0 -1.456 -0.501 -12.456 2.511 3.071
## 22 Rifampicin_0.34_T1 - None_0_T0 -0.211  0.108  -5.899 0.747 0.788
## 23 Rifampicin_0.34_T1 - None_0_T1  0.044  0.021  -2.744 2.024 0.428
## 24 Rifampicin_0.34_T2 - None_0_T0 -2.009 -1.123 -14.440 5.162 3.828
## 25 Rifampicin_0.34_T2 - None_0_T2 -0.553 -0.194  -8.684 7.175 2.107
## 
## [[3]]
##                          contrast   mean    med    min   max    sd
## 1    Colistin_0.44_T1 - None_0_T0 -0.007 -0.001 -1.800 1.405 0.156
## 2    Colistin_0.44_T1 - None_0_T1 -0.008 -0.006 -0.991 1.291 0.149
## 3    Colistin_0.44_T2 - None_0_T0 -0.003  0.011 -8.500 3.440 0.395
## 4    Colistin_0.44_T2 - None_0_T2  0.004  0.003 -6.549 3.240 0.345
## 5    Imipenem_0.06_T1 - None_0_T0 -0.002 -0.002 -1.946 0.874 0.127
## 6    Imipenem_0.06_T1 - None_0_T1 -0.002 -0.004 -0.844 0.901 0.100
## 7    Imipenem_0.06_T2 - None_0_T0 -0.006 -0.002 -6.341 2.089 0.297
## 8    Imipenem_0.06_T2 - None_0_T2  0.001  0.002 -2.179 1.869 0.176
## 9    Imipenem_0.09_T1 - None_0_T0  0.000 -0.001 -1.932 1.286 0.147
## 10   Imipenem_0.09_T1 - None_0_T1  0.000 -0.002 -0.935 1.313 0.124
## 11   Imipenem_0.09_T2 - None_0_T2  0.007  0.000 -3.375 2.626 0.270
## 12  Meropenem_0.11_T1 - None_0_T0  0.006  0.004 -1.181 0.548 0.130
## 13  Meropenem_0.11_T1 - None_0_T1  0.005  0.001 -0.517 0.545 0.117
## 14  Meropenem_0.11_T2 - None_0_T0 -0.003  0.010 -5.389 1.799 0.324
## 15  Meropenem_0.11_T2 - None_0_T2  0.004  0.005 -1.483 1.977 0.233
## 16  Meropenem_0.17_T1 - None_0_T0  0.003 -0.002 -1.088 1.112 0.141
## 17  Meropenem_0.17_T1 - None_0_T1  0.002 -0.001 -0.693 0.779 0.125
## 18  Meropenem_0.17_T2 - None_0_T0  0.000 -0.002 -5.659 2.483 0.427
## 19  Meropenem_0.17_T2 - None_0_T2  0.007  0.005 -1.969 2.662 0.346
## 20          None_0_T1 - None_0_T0  0.001  0.002 -1.102 0.677 0.102
## 21          None_0_T2 - None_0_T0 -0.007  0.002 -4.162 1.043 0.214
## 22 Rifampicin_0.34_T1 - None_0_T0  0.002  0.000 -0.552 0.749 0.087
## 23 Rifampicin_0.34_T1 - None_0_T1  0.001  0.002 -0.462 0.958 0.098
## 24 Rifampicin_0.34_T2 - None_0_T0 -0.015  0.017 -9.194 4.165 0.768
## 25 Rifampicin_0.34_T2 - None_0_T2 -0.008  0.026 -9.216 3.588 0.753

Transpose results: Ungroup from type, then regroup results by contrast group

# aba.edgeR.t <- aba.edgeR %>% 
#   select(type, results) %>% 
#   unnest(results) %>% 
#   group_by(contrast) %>% 
#   nest %>%
#   mutate(
#       plot = map2(
#           data,
#           `contrast`,
#           ~ggplot(
#               .x,
#               aes(x = `logFC`, fill = `type`)) +
#               geom_density() +
#               ggtitle(.y) +
#               theme_ipsum() +
#   scale_fill_viridis(
#       discrete = TRUE, 
#       alpha = 0.3, 
#       direction = -1, 
#       option = "magma")))
# 
# print(aba.edgeR.t$plot)
# aba.edgeR.prior <- fread("../../Results/melted_results.tsv.gz") %>% 
#   filter(condition %in% c("None_0_T1 - None_0_T0", "None_0_T2 - None_0_T0")) %>% 
#   select(type, condition, LFC, LFC.adj) %>% 
#   group_by(condition) %>% 
#   nest %>% 
#   mutate(
#       plot = map2(
#           data,
#           `condition`,
#           ~ggplot(
#               .x,
#               aes(x = `LFC.adj`, fill = `type`)) +
#               geom_density() +
#               ggtitle(.y) +
#               theme_ipsum() +
#               scale_fill_viridis(
#                   discrete = TRUE,
#                   alpha = 0.3,
#                   direction = -1,
#                   option = "magma") +
#               xlim(
#                   fread("../../Results/melted_results.tsv.gz") %>%
#                       filter(condition %in% c("None_0_T1 - None_0_T0", "None_0_T2 - None_0_T0")) %>%
#                       select(type, condition, LFC, LFC.adj) %>% select(LFC.adj) %>% range)))
# 
# print(aba.edgeR.prior$plot)

Show replicates and log2 fold change with respect to T0.

aba.counts.stats <- aba.counts.verbose %>% 
    select(-data_matrix) %>% 
    unnest(data) %>% 
    pivot_longer(
        !c(type, spacer), 
        names_to = "condition", 
        values_to = "count")

aba.counts.stats <- aba.counts.stats %>% 
    inner_join(
        aba.counts.stats %>%
            filter(condition == "None_0_T0_1") %>%
            ungroup %>%
            rename(base_count = count) %>%
            select(spacer, base_count)) %>% 
    mutate(logFC = log2(count/base_count)) %>%
    rename(sample = condition)
## Joining, by = "spacer"
aba.counts.stats <- aba.counts.stats %>% 
    inner_join(
        aba.counts.stats %>% 
            group_by(sample) %>% 
            filter(type == "control") %>% 
            summarise(logFC.med.ctrl = median(logFC))) %>%
    mutate(logFC.adj = logFC - logFC.med.ctrl) %>%
    separate(
        sample, c("drug", "dose", "timing", "rep"), "_") %>%
    mutate(rep = paste("Rep", rep)) %>%
    mutate(dose.with_units = paste0(dose,"ng/μL")) %>%
    unite(
        "Sample", drug, dose.with_units, timing, remove = F) %>% 
    group_by(Sample)
## Joining, by = "sample"
logFC.range <- aba.counts.stats %>% filter(!is.infinite(logFC.adj)) %>% pull(logFC.adj) %>% range

aba.counts.stats.reps <- aba.counts.stats %>% 
    mutate(Type = factor(type)) %>%
    arrange(desc(Type)) %>%
    nest %>% 
    mutate(data = map(
        data,
        ~.x %>% pivot_wider(
            id_cols = c(Type, spacer), 
            names_from = rep, 
            values_from = logFC.adj))) %>%
    filter(Sample != "None_0ng/μL_T0") %>% 
    mutate(
        data.finite = map(
            data, ~.x %>% filter(is.finite(`Rep 1`) & is.finite(`Rep 2`))), 
        correlation = map_dbl(data.finite, ~cor(.x$`Rep 1`, .x$`Rep 2`)),
        Title = paste0(
            "Log2FC: ", gsub("_", " ", Sample), 
            "(Cor=", round(correlation, 3), ")"))
    

aba.counts.stats.reps <- aba.counts.stats.reps %>%
    mutate(plot = map2(
        data,
        `Title`,
        ~ggplot(
            .x,
            aes(x = `Rep 1`, y = `Rep 2`)) +
            geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "red") +
            geom_point(aes(color = Type), shape = 20) +
            ggtitle(.y) +
            doc_theme +
            xlim(logFC.range) +
            ylim(logFC.range) +
            # scale_alpha_manual(values = c(0.5, 0.15, 0.25)) +
            scale_color_viridis(
                discrete = TRUE,
                alpha = c(0.15, 0.25, 0.35),
                direction = -1,
                option = "viridis") +
            theme(
                legend.position = "bottom",
                legend.key = element_rect(fill = "#ECECEC")) +
            guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5))))) 

aba.counts.stats.reps %>% pull(plot) %>% print
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

########

aba.counts.stats.reps %>% 
    filter(Sample %like% "None") %>% 
    mutate(Title = gsub("Log2FC: None 0ng/μL ", "", Title)) %>%
    mutate(Title = gsub("Cor=", "", Title)) %>%
    mutate(Title = gsub("\\(", "~(R^2~", Title)) %>%
    unnest(data) %>% 
    ggplot(
        aes(x = `Rep 1`, y = `Rep 2`)) +
    geom_abline(intercept = 0, slope = 1, size = 0.5, colour = "black") +
    geom_point(aes(color = Type), cex = 2) +
    facet_wrap(~Title, labeller = label_parsed) +
    scale_colour_manual(
        values =
            c("control" = alpha("grey", 0.5), 
              "mismatch" = alpha("#1F78B4", 0.5),
              "perfect" = alpha("#E31A1C", 0.5)))  + 
    doc_theme +
    theme(legend.position = "none")

Violin Plots

aba.counts.violin <- aba.counts.stats %>% 
    mutate(Log2FC = logFC.adj) %>%
    mutate(rep = paste("Rep", rep)) %>%
    mutate(dose.with_units = paste0(dose,"ng/μL")) %>%
    unite(
        "Sample", drug, dose.with_units, timing, remove = F, sep = " ") %>% 
    filter((Sample %like% "None") & timing != "T0") %>% 
        mutate(
            Sample = gsub("^", "Induction + ", Sample),
            Sample = gsub(" \\+ None 0ng/μL", "", Sample),
            Sample = gsub("(T[1,2])", "at \\1", Sample),
            Sample = factor(Sample, levels = unique(Sample)),
            Sample = factor(Sample, levels = rev(levels(Sample))),
            Type = factor(type),
            Type = factor(Type, levels = rev(levels(Type)))) %>%
    ggplot(aes(x = Sample, y = Log2FC, fill = Type)) + 
    geom_violin(alpha = 1, width = 0.5) + 
    doc_theme + 
    scale_fill_manual(values = rev(RColorBrewer::brewer.pal(6,"Paired")[c(TRUE, FALSE)])) +
        coord_flip() +
      guides(fill = guide_legend(reverse = TRUE)) +
    ggtitle("Distribution of Guide Depletion")

print(aba.counts.violin)
## Warning: Removed 46 rows containing non-finite values (stat_ydensity).

melted_results <- fread("../../Results/melted_results.tsv.gz")
interest <- fread("../../interest.tsv", sep = "\t")

melted_results %>% 
    filter(condition %in% interest$condition) %>% 
    filter(shift %like% "None") %>% 
    mutate(
        `Fold-change` = 2^LFC.adj, 
        Type = type, condition = case_when(
            condition %like% "T1" ~ "T1", 
            condition %like% "T2" ~ "T2")) %>%  
    ggplot(aes(x = `Fold-change`)) + 
    geom_density(
        aes(fill = Type), 
        alpha = 0.5) + 
    facet_wrap(facets = c("condition"), ncol = 2) + 
    doc_theme +
    scale_fill_manual(
        values =
            c("control" = "grey", 
                "mismatch" = "#1F78B4",
                "perfect" = "#E31A1C"))